#include "FragmentShaderSemantics.fx"


//_________________________________________________________________________________________________

// Quality settings
static const float  g_NumDirs     = 8.0;
static const float  g_NumSteps    = 1.0;
static const float  g_NumRays     = 6.0; // Only used on RayMarching

// Some blur parameters below are not currently used - a hard-coded blur profile is used instead.
//static const float  g_BlurRadius  = 8.0;
//static const float  g_BlurFalloff = 0.05;
static const float  g_Sharpness   = 1.0;


#define LOW_QUALITY_AO		0
#define NORMAL_QUALITY_AO	1
#define HIGH_QUALITY_AO		0

#define USE_NORMAL_TEX_AO	0

#define COSINE_WEIGHTED

#define USE_RAY_MARCHING	0
#define USE_HORIZON			0
#define USE_MONOLITH		1

// Texture samplers
sampler2D color_tex					: register(s0);	/// full res color tex
sampler2D normal_tex				: register(s1);	/// full res normal tex
sampler2D depth_tex					: register(s2);	/// full res depth tex
sampler2D random_tex				: register(s3);	/// random tex
sampler2D source_tex				: register(s4);	/// full res source tex
//sampler2D stencil_tex				: register(s5);	/// full res stencil tex

// Uniform parameters
uniform float4 g_Params  			: register(c110); // {Radius, LinearAtt, Contrast, InvRadius}
uniform float4 g_Params2			: register(c111); // {AngleBias, Tan(AngleBias), RadiusSq, InvRadiusSq }
uniform float4 g_Params3			: register(c112); // FAR {falloff start, falloff end, 1.0 / (falloff end - start), <free> }
uniform float4 g_Params4			: register(c113); // NEAR {falloff start, falloff end, 1.0 / (falloff end - start), <free> }
uniform float4 g_FocalLen			: register(c114); // 1/tan(FOVX/2), 1/tan(FOVY/2), tan(FOVX/2), tan(FOVY/2)
uniform float4 g_Resolution			: register(c115); // pixel width, pixel height, 1/pixel width, 1/pixel height
uniform float4 g_Dirs[32]			: register(c116);


// Shader data formats
struct VS_IN
{
	float4 position   : POSITION;	///< clip space vertex position
	float2 tex		  : TEXCOORD0;
};

struct VS_OUTSSAO
{
	float4 position		: POSITION;
	float2 texUV		: TEXCOORD0;
	float2 tex			: TEXCOORD1;
};

//_________________________________________________________________________________________________

// Shared fragment shader function for depth texture reading.
#if defined(CG_PS3)

	inline float readDepthTexture(sampler2D tex, float2 uvTex)
	{
		float4 depthColor = tex2D(tex, uvTex).rgba;
		float4 depthFactor = float4(256.0/16777215.0, 1.0/16777215.0, 0.0f, 256.0*256.0/16777215.0);
		return dot( round(depthColor*255.0), depthFactor );
	}

#elif defined(_WINPC)

	#if defined(_Z_RGB)

		inline float readDepthTexture(sampler2D tex, float2 uvTex)
		{
			float4 depthColor = tex2Dlod(tex, float4(uvTex,0,0)).rgba;
			float3 depthFactor = float3(256.0*256.0/16777215.0, 256.0/16777215.0, 1.0/16777215.0);
			return dot(round(depthColor.rgb*255.0), depthFactor);
		}

	#elif defined(_Z_FLOAT)

		inline float readDepthTexture(sampler2D tex, float2 uvTex)
		{
			// Don't do tex2Dlod here, it breaks nVidia's hack!
			return tex2D(tex, uvTex).r;						
		}	

	#else // _Z_ARG

		inline float readDepthTexture(sampler2D tex, float2 uvTex)
		{
			float4 depthColor = tex2Dlod(tex, float4(uvTex,0,0)).rgba;
			float3 depthFactor = float3(256.0*256.0/16777215.0, 256.0/16777215.0, 1.0/16777215.0);
			return dot(round(depthColor.arg*255.0), depthFactor);
		}	

	#endif

#else

	inline float readDepthTexture(sampler2D tex, float2 uvTex)
	{
	#ifdef USE_INVERTED_PROJECTION_MTX
		return 1.0 - tex2D(tex, uvTex).r;
	#else
		return tex2D(tex, uvTex).r;
	#endif
	}

#endif

float readDepthTextureLinear(sampler2D tex, float2 uvTex)
{
	//return tex2D(tex, uvTex).r;
	return fs_projection_params.x / (1.0 - readDepthTexture(tex, uvTex) * fs_projection_params.w);
}


// Entry point for shared SSAO vertex shader.
VS_OUTSSAO ssao_vx(VS_IN input)
{
	// Outputs position:    ... and texUV:    ... and tex:
	// (-1, 1)  (1, 1)      (0, 0)  (1, 0)    (-1, 1)/fl  (1, 1)/fl
	//      +---+               +---+                 +-----+
	//      |   |               |   |                 |     |
	//      +---+               +---+                 +-----+
	// (-1,-1)  (1,-1)      (0, 1)  (1, 1)    (-1,-1)/fl  (1,-1)/fl
	// for the full-screen quad.

	VS_OUTSSAO output;
	output.position = input.position;
	output.texUV = input.tex;
	output.tex = input.position.xy / g_FocalLen.xy;
	return output;
}

#if USE_RAY_MARCHING

//=============================================================================================================================
// Ray Marching Method
//=============================================================================================================================

float3 fetch_eye_pos(float3 position)
{
	// Projects eye-space position of a ray sample to texture UV for depth lookup
	// and returns the eye-space position of the backbuffer at that screen location.
	float2 r =  position.xy / position.z;
	float2 tx_n = float2(0.5, -0.5) * ((g_FocalLen.xy * r) + float2(1.0, -1.0));
		
	float z = readDepthTextureLinear(depth_tex, tx_n);
	return float3(r.xy * z, z);
}


// Entry point for ray marching SSAO pixel shader.
float4 ssao_px( VS_OUTSSAO In ) : COLOR
{
	//[branch]
	//if (tex2D(stencil_tex, In.texUV).g < 0.01)
	//	return 1.0;

	// Restrict rendering to (effectively) LEGO pieces
	// Note : This isn't generic. The normal.z is the 'Shadow Casting' flag, which for LEGO works out
	// as LEGO pieces.
	clip(0.1 - tex2Dlod(normal_tex, float4(In.texUV,0,0)).z);

	float z = readDepthTextureLinear(depth_tex, In.texUV);
	
	// Account for far plane
	if (z > 100.0)
	    return 1.0;	
	
	float3 P = float3(In.tex.xy * z, z);
 	
	// Reconstruct a signed 3D normal from the unsigned xy value stored in the normal buffer.
	float2 normal = tex2D(normal_tex, In.texUV).xy;
	normal = normal * 2.0 - 1.0;
	float Nz = -sqrt(1.0 - (normal.x * normal.x + normal.y * normal.y));
	float3 N = float3(normal.xy, Nz);

	float3 Tan   = float3(1,0,0);
	float3 BiTan = normalize(cross(N, Tan));
	Tan          = cross(BiTan, N);

	const float step_size = g_Params.r / g_NumSteps;

	float3 rand = tex2D(random_tex, frac(In.texUV * g_Resolution.xy / 64.0));
	rand.xy = rand.xy * 2.0 - 1.0;
	
	float3 dir_t;
	float color = 0.0;

	// Process rays in g_NumDirs jittered azimuthal planes cutting through the zenith of the local surface hemisphere.
	for (int d = 0; d < g_NumDirs; d++) 
	{
		float3 dir = float3(g_Dirs[d].x * rand.x - g_Dirs[d].y * rand.y, 
                            g_Dirs[d].x * rand.y + g_Dirs[d].y * rand.x, 
                            0.0);	
                            
		dir = dir.x * Tan + dir.y * BiTan;
		
		float n_weight = 1.0 / (g_NumRays * g_NumDirs);
		
		// Fire g_NumRays jittered rays of differing elevation in each azimuthal plane.
		for (float n = 1.0; n <= g_NumRays; n++) 
		{
			float frac = n / (g_NumRays + 1.5e-2);
			
			#ifdef COSINE_WEIGHTED
				float3 ndir = dir * sqrt(frac) + N * sqrt(1 - frac);
			#else
				float3 ndir = dir * frac + N * sqrt(1.0 - frac * frac);
			#endif

			// Step out along the ray.
            for (float i = 1.0; i <= g_NumSteps; i++) 
            {
				float3 cur_ray = (i + rand.z) * step_size * ndir;
				float3 cur_pos = cur_ray + P;
				float3 tex_pos = fetch_eye_pos(cur_pos);
		
				if (tex_pos.z - cur_pos.z < 0.0) 
				{
					float l = length(P - tex_pos);
					if (l < g_Params.r) 
					{
						color -= n_weight * (g_Params.r - g_Params.g * l) / g_Params.r;
						break;
					}
				}
			}
		}		
	}	

	//return 1.0 + color * g_Params.b;
	return -color * g_Params.b;
}
#endif

//=============================================================================================================================
// Horizon Method
//=============================================================================================================================

#if USE_HORIZON

#define HALF_PI     1.57079633


//----------------------------------------------------------------------------------
float tangent(float3 P, float3 S)
{
	return (P.z - S.z) / length(S.xy - P.xy);
}

//----------------------------------------------------------------------------------
float3 uv_to_eye(float2 uv, float eye_z)
{
    uv = (uv * float2(2.0, -2.0) - float2(1.0, -1.0));
    return float3(uv * g_FocalLen.zw * eye_z, eye_z);
}

//----------------------------------------------------------------------------------
float3 fetch_eye_pos(float2 uv)
{	
	float z = readDepthTextureLinear(depth_tex, uv);
	return uv_to_eye(uv, z);
}

//----------------------------------------------------------------------------------
float3 tangent_eye_pos(float2 uv, float4 tangentPlane)
{
	// view vector going through the surface point at uv
	float3 V = fetch_eye_pos(uv);
	float NdotV = dot(tangentPlane.xyz, V);
	// intersect with tangent plane except for silhouette edges
	if (NdotV < 0.0)
		V *= (tangentPlane.w / NdotV);
	return V;
}

//----------------------------------------------------------------------------------
float length2(float3 v)
{
	return dot(v, v);
} 

//----------------------------------------------------------------------------------
float3 min_diff(float3 P, float3 Pr, float3 Pl)
{
    float3 V1 = Pr - P;
    float3 V2 = P - Pl;
    return (length2(V1) < length2(V2)) ? V1 : V2;
}

//----------------------------------------------------------------------------------
float falloff(float rSq)
{
    return saturate(1.0 - g_Params.g * rSq);
}

//----------------------------------------------------------------------------------
float2 snap_uv_offset(float2 uv)
{
    return round(uv * g_Resolution.xy) * g_Resolution.zw;
}

//----------------------------------------------------------------------------------
float2 snap_uv_coord(float2 uv)
{
    //return (floor(uv * g_Resolution.xy) + 0.5) * g_Resolution.zw;
    return uv - (frac(uv * g_Resolution.xy) - 0.5) * g_Resolution.zw;
}

//----------------------------------------------------------------------------------
float tan_to_sin(float x)
{
    return x / sqrt(1.0 + x*x);
}

//----------------------------------------------------------------------------------
float3 tangent_vector(float2 deltaUV, float3 dPdu, float3 dPdv)
{
    return deltaUV.x * dPdu + deltaUV.y * dPdv;
}

//----------------------------------------------------------------------------------
float tangent(float3 T)
{
    return -T.z / length(T.xy);
}

//----------------------------------------------------------------------------------
float biased_tangent(float3 T)
{
    float phi = atan(tangent(T)) + g_Params2.r;
    return tan(min(phi, HALF_PI));
}

//----------------------------------------------------------------------------------
void integrate_direction(inout float ao, float3 P, float2 uv, float2 deltaUV,
                         float numSteps, float tanH, float sinH)
{
    for (float j = 1.0; j <= numSteps; ++j)
	{
 		uv += deltaUV;
        float3 S = fetch_eye_pos(uv);
        
        // Ignore any samples outside the radius of influence
        float d2  = length2(S - P);
        if (d2 < g_Params2.b)
		{
            float tanS = tangent(P, S);

            [branch]
            if(tanS > tanH)
			{
                // Accumulate AO between the horizon and the sample
                float sinS = tanS / sqrt(1.0 + tanS * tanS);
                float r2 = d2 * g_Params2.a;
                ao += falloff(r2) * (sinS - sinH);
                
                // Update the current horizon angle
                tanH = tanS;
                sinH = sinS;
            }
        }
    }
}

//----------------------------------------------------------------------------------
#if LOW_QUALITY_AO
float AccumulatedHorizonOcclusion_LQ(float2 deltaUV, 
                                     float2 uv0, 
                                     float3 P, 
                                     float numSteps, 
                                     float randstep)
{
    // Randomize starting point within the first sample distance
    float2 uv = uv0 + snap_uv_offset(randstep * deltaUV);
    
    // Snap increments to pixels to avoid disparities between xy 
    // and z sample locations and sample along a line
    deltaUV = snap_uv_offset(deltaUV);

    float tanT = tan(-HALF_PI + g_Params2.r);
    float sinT = (g_Params2.r != 0.0) ? tan_to_sin(tanT) : -1.0;

    float ao = 0.0;
    integrate_direction(ao, P, uv, deltaUV, numSteps, tanT, sinT);

    // Integrate opposite directions together
    deltaUV = -deltaUV;
    uv = uv0 + snap_uv_offset(randstep * deltaUV);
    integrate_direction(ao, P, uv, deltaUV, numSteps, tanT, sinT);

    // Divide by 2 because we have integrated 2 directions together
    // Subtract 1 and clamp to remove the part below the surface
    return max(ao * 0.5 - 1.0, 0.0);
}
#endif

//----------------------------------------------------------------------------------
#if NORMAL_QUALITY_AO
float AccumulatedHorizonOcclusion_NQ(float2 deltaUV, 
                                    float2 uv0, 
                                    float3 P, 
                                    float numSteps, 
                                    float randstep,
                                    float3 dPdu,
                                    float3 dPdv)
{
    // Randomize starting point within the first sample distance
    float2 uv = uv0;// + snap_uv_offset(randstep * deltaUV);
    
    // Snap increments to pixels to avoid disparities between xy 
    // and z sample locations and sample along a line
    //deltaUV = snap_uv_offset(deltaUV);

    // Compute tangent vector using the tangent plane
    float3 T = deltaUV.x * dPdu + deltaUV.y * dPdv;

    float tanH = biased_tangent(T);
	float sinH = tanH / sqrt(1.0 + tanH * tanH);

    float ao = 0.0;
	for(float j = 1.0; j <= numSteps; ++j)
	{
 		uv += deltaUV;
        float3 S = fetch_eye_pos(uv);
        
        // Ignore any samples outside the radius of influence
        float d2  = length2(S - P);
        if (d2 < g_Params2.b)
		{
            float tanS = tangent(P, S);

            [branch]
            if (tanS > tanH)
			{
                // Accumulate AO between the horizon and the sample
                float sinS = tanS / sqrt(1.0 + tanS * tanS);
                float r2 = d2 * g_Params2.a;
                ao += falloff(r2) * (sinS - sinH);
                
                // Update the current horizon angle
                tanH = tanS;
                sinH = sinS;
            }
        } 
    }

    return ao;
}
#endif

//----------------------------------------------------------------------------------
#if HIGH_QUALITY_AO
float AccumulatedHorizonOcclusion_HQ(float2 deltaUV, 
                                     float2 uv0, 
                                     float3 P, 
                                     float numSteps, 
                                     float randstep,
                                     float3 dPdu,
                                     float3 dPdv)
{
    // Jitter starting point within the first sample distance
    float2 uv = (uv0 + deltaUV) + randstep * deltaUV;
    
    // Snap first sample uv and initialize horizon tangent
    float2 snapped_duv = snap_uv_offset(uv - uv0);
    float3 T = tangent_vector(snapped_duv, dPdu, dPdv);
    float tanH = tangent(T) + g_Params2.g;

    float ao = 0.0;
    float h0 = 0.0;
    for (float j = 0.0; j < numSteps; ++j)
	{
        float2 snapped_uv = snap_uv_coord(uv);
        float3 S = fetch_eye_pos(snapped_uv);
        uv += deltaUV;

        // Ignore any samples outside the radius of influence
        float d2 = length2(S - P);
        if (d2 < g_Params2.b)
		{
            float tanS = tangent(P, S);

            [branch]
            if (tanS > tanH)
			{
                // Compute tangent vector associated with snapped_uv
                float2 snapped_duv = snapped_uv - uv0;
                float3 T = tangent_vector(snapped_duv, dPdu, dPdv);
                float tanT = tangent(T) + g_Params2.g;

                // Compute AO between tangent T and sample S
                float sinS = tan_to_sin(tanS);
                float sinT = tan_to_sin(tanT);
                float r2 = d2 * g_Params2.a;
                float h = sinS - sinT;
                ao += falloff(r2) * (h - h0);
                h0 = h;

                // Update the current horizon angle
                tanH = tanS;
            }
        }
    }
    return ao;
}
#endif

//----------------------------------------------------------------------------------

// Entry point for horizon based SSAO pixel shader.
float4 ssao_px( VS_OUTSSAO In ) : COLOR
{
 	// Restrict rendering to (effectively) LEGO pieces
	// Note : This isn't generic. The normal.z is the 'Shadow Casting' flag, which for LEGO works out
	// as LEGO pieces.
	//clip(0.1 - tex2Dlod(normal_tex, float4(In.texUV,0,0)).z);

	float3 P = fetch_eye_pos(In.texUV);
    
	//clip(g_Params3.y - P.z);

	// Calculate the real number of steps based on Z distance, and  
	// early out if geometry is too far away.   	
	float2 step_size = 0.5 * g_Params.r * g_FocalLen.xy / P.z;

	// The following optimisation causes loop unrolling issues, since numSteps is no longer constant.
	float numSteps = g_NumSteps;
	//float tempNumSteps = min(g_NumSteps, min(step_size.x * g_Resolution.x, step_size.y * g_Resolution.y));
	//clip(tempNumSteps - 1.0);

    step_size = step_size / (numSteps + 1.0);

	// Nearest neighbour pixels on the tangent plane
	float3 Pr, Pl, Pt, Pb;

	#if USE_NORMAL_TEX_AO
		// Reconstruct a signed 3D normal from the unsigned xy value stored in the normal texture.
		float2 rawN = tex2Dlod(normal_tex, float4(In.texUV,0,0)).xy;
		rawN = rawN * 2.0 - 1.0;
		float Nz = -sqrt(1.0 - (rawN.x * rawN.x + rawN.y * rawN.y));
		float3 N = float3(rawN, Nz);

		float4 tangentPlane = float4(N, dot(P, N));
		Pr = tangent_eye_pos(In.texUV + float2( g_Resolution.z, 0.0), tangentPlane);
		Pl = tangent_eye_pos(In.texUV + float2(-g_Resolution.z, 0.0), tangentPlane);
		Pt = tangent_eye_pos(In.texUV + float2(0.0,  g_Resolution.w), tangentPlane);
		Pb = tangent_eye_pos(In.texUV + float2(0.0, -g_Resolution.w), tangentPlane);
	#else
		Pr = fetch_eye_pos(In.texUV + float2( g_Resolution.z, 0.0));
		Pl = fetch_eye_pos(In.texUV + float2(-g_Resolution.z, 0.0));
		Pt = fetch_eye_pos(In.texUV + float2(0.0,  g_Resolution.w));
		Pb = fetch_eye_pos(In.texUV + float2(0.0, -g_Resolution.w));
	#endif
    
	// Screen-aligned basis for the tangent plane
	float3 dPdu = min_diff(P, Pr, Pl);
	float3 dPdv = min_diff(P, Pt, Pb) * (g_Resolution.y * g_Resolution.z);

    // Random vector: (cos(alpha), sin(alpha), jitter)
	float3 rand_Dir = float3(1,0,0);//tex2D(random_tex, frac(In.texUV * g_Resolution.xy / 64.0));
	//rand_Dir.xy = rand_Dir.xy * 2.0 - 1.0;
	
	float ao = 0.0;

	for (int d = 0; d < g_NumDirs; d++)
	{
		//float2 deltaUV = float2(	g_Dirs[d].x * rand_Dir.x - g_Dirs[d].y * rand_Dir.y, 
		//							g_Dirs[d].x * rand_Dir.y + g_Dirs[d].y * rand_Dir.x);

		float2 deltaUV = float2(g_Dirs[d].x, g_Dirs[d].y);

		deltaUV *= step_size.xy;

		#if LOW_QUALITY_AO
			ao += AccumulatedHorizonOcclusion_LQ(deltaUV, In.texUV, P, numSteps, rand_Dir.z);
		#endif

		#if NORMAL_QUALITY_AO
			ao += AccumulatedHorizonOcclusion_NQ(deltaUV, In.texUV, P, numSteps, rand_Dir.z, dPdu, dPdv);
		#endif

		#if HIGH_QUALITY_AO
			ao += AccumulatedHorizonOcclusion_HQ(deltaUV, In.texUV, P, numSteps, rand_Dir.z, dPdu, dPdv);
		#endif
	}

	// Apply view-space attenuation
	ao *= 1.0 - saturate((P.z - g_Params3.x) * g_Params3.z);

	ao = (ao / g_NumDirs) * g_Params.b;

	return float4(0,0,0,ao);
	ao = 1-ao;
	return float4(ao,ao,ao,1);
}

//----------------------------------------------------------------------------------

// Not currently used.
float BlurFunction(float2 uv, float center_d, inout float w_total, float w)
{
    float c = tex2D(source_tex, uv);
	float d = readDepthTextureLinear(depth_tex, uv);
	float ddiff = d - center_d;
	w *= exp(-ddiff * ddiff * g_Sharpness);
	w_total += w;
    return w * c;
}

#endif


//=============================================================================================================================
// Monolith implementation
//=============================================================================================================================

#if USE_MONOLITH

// fall-off definitions
#define AO_FAR_FALLOFF_START			7.0f				// the effect will begin to fall off at this distance from the viewer
#define AO_FAR_FALLOFF_RANGE			3.0f				// the effect will fall off over this range

#define AO_NEAR_CUTOFF_RANGE			1.5f				// disable the effect within this distance (for perf reasons)
#define AO_NEAR_FADE_RANGE				0.5f				// fade in at the near point over this distance

// noise definitions
#define AO_NOISE_SCALE					1.0f				// the amount we will scale the noise texture coordinates
#define AO_NOISE_WEIGHT					0.5f				// the weight that the noise should factor into the computation (0 = no effect, 1 = full)

// sample definitions
//#define AO_SAMPLE_OFFSET				0.5773502691896f	// the unit offset from the base world coordinates; used to define the sphere we sample in
#define AO_SAMPLE_OFFSET				1.0
#define AO_SAMPLE_COUNT					8					// the number of samples we take

// influence definitions
#define AO_INFLUENCE_DISTANCE			0.45f				// the world unit distance that within which we consider depth values
#define AO_INFLUENCE_MIN				0.26f				// the occlusion factor we'll consider the minimum value
#define AO_INFLUENCE_MAX				0.80f				// the occlusion factor we'll consider the maximum value
#define AO_INFLUENCE_ONE_OVER_RANGE		2.00f				// must be equivalent to 1.0 / (AO_INFLUENCE_MAX - AO_INFLUENCE_MIN)

// sample radius
//these values control the radius that should be used as the sample. This goes from the minimum radius to the maximum radius over the
//range specified
#define AO_RADIUS_MIN					0.025f				// the radius to use when at the minimum dist
#define AO_RADIUS_MAX					0.025f				// the radius to use when at the maximum dist
#define AO_RADIUS_START_DIST			2.5f				// the distance at which we start interpolating from min to max radius
#define AO_RADIUS_RANGE					8.0f				// the range over which we go from the minimum to maximum radius

//----------------------------------------------------------------------------------
float3 uv_to_eye(float2 uv, float eye_z)
{
    uv = (uv * float2(2.0, -2.0) - float2(1.0, -1.0));
    return float3(uv * g_FocalLen.zw * eye_z, eye_z);
}

//----------------------------------------------------------------------------------
float3 fetch_eye_pos(float2 uv)
{	
	float z = readDepthTextureLinear(depth_tex, uv);
	return uv_to_eye(uv, z);
}

//----------------------------------------------------------------------------------

float4 ssao_px(VS_OUTSSAO In) : COLOR
{
	float ao = 0;

	float3 fragPosition = fetch_eye_pos(In.texUV);

	clip(g_Params3.y - fragPosition.z);
	clip(fragPosition.z - g_Params4.y);

	float3 fragNormal = tex2Dlod(normal_tex, float4(In.texUV,0,0)).xyz * 2.0 - 1.0;
	fragNormal.z = -sqrt(1.0 - (fragNormal.x * fragNormal.x + fragNormal.y * fragNormal.y));

	float2 dZdXY;

#if 0

	dZdXY = -fragNormal.xy / fragNormal.z;

#else

	float depthRight = readDepthTextureLinear(depth_tex, In.texUV + float2(g_Resolution.z, 0));
	float depthTop = readDepthTextureLinear(depth_tex, In.texUV + float2(0, g_Resolution.w));

	float2 deltaZ = float2(depthRight, depthTop) - fragPosition.z;
	float2 deltaXY = g_Resolution.zw * 2.0 * g_FocalLen.zw * fragPosition.z;

	dZdXY = deltaZ / deltaXY;

#endif
    

	float radius = g_Params.r;
	float2 radiusUV = 0.5 * g_Params.r * g_FocalLen.xy / fragPosition.z;

	float3 rand = tex2D(random_tex, frac(In.texUV * g_Resolution.xy / 64.0));
	rand.xy = rand.xy * 2.0 - 1.0;

	float4 sampleDir0 = float4(-0.7071, -0.7071,  0.7071, -0.7071);
	float4 sampleDir1 = float4( 0.7071,  0.7071, -0.7071,  0.7071);
	float4 sampleDir2 = float4(   0, -1.0,  1.0,    0);
	float4 sampleDir3 = float4(   0,  1.0, -1.0,    0);

	rand *= g_Params.y;

	//sampleDir0 += rand.xyxy;
	//sampleDir1 += rand.xyxy;
	//sampleDir2 += rand.xyxy;
	//sampleDir3 += rand.xyxy;

	float4 sampleUV0 = In.texUV.xyxy + sampleDir0 * radiusUV.xyxy;
	float4 sampleUV1 = In.texUV.xyxy + sampleDir1 * radiusUV.xyxy;
	float4 sampleUV2 = In.texUV.xyxy + sampleDir2 * radiusUV.xyxy;
	float4 sampleUV3 = In.texUV.xyxy + sampleDir3 * radiusUV.xyxy;
	float4 sampleUV4 = In.texUV.xyxy + sampleDir0 * radiusUV.xyxy * 2.0;
	float4 sampleUV5 = In.texUV.xyxy + sampleDir1 * radiusUV.xyxy * 2.0;
	float4 sampleUV6 = In.texUV.xyxy + sampleDir2 * radiusUV.xyxy * 2.0;
	float4 sampleUV7 = In.texUV.xyxy + sampleDir3 * radiusUV.xyxy * 2.0;

	float4 sampleXYdelta0 = sampleDir0 * radius.xxxx;
	float4 sampleXYdelta1 = sampleDir1 * radius.xxxx;
	float4 sampleXYdelta2 = sampleDir2 * radius.xxxx;
	float4 sampleXYdelta3 = sampleDir3 * radius.xxxx;
	float4 sampleXYdelta4 = sampleDir0 * radius.xxxx * 2.0;
	float4 sampleXYdelta5 = sampleDir1 * radius.xxxx * 2.0;
	float4 sampleXYdelta6 = sampleDir2 * radius.xxxx * 2.0;
	float4 sampleXYdelta7 = sampleDir3 * radius.xxxx * 2.0;


	float4 sampleDepths[4];
	sampleDepths[0].x = readDepthTextureLinear(depth_tex, sampleUV0.xy);
	sampleDepths[0].y = readDepthTextureLinear(depth_tex, sampleUV0.zw);
	sampleDepths[0].z = readDepthTextureLinear(depth_tex, sampleUV1.xy);
	sampleDepths[0].w = readDepthTextureLinear(depth_tex, sampleUV1.zw);
	sampleDepths[1].x = readDepthTextureLinear(depth_tex, sampleUV2.xy);
	sampleDepths[1].y = readDepthTextureLinear(depth_tex, sampleUV2.zw);
	sampleDepths[1].z = readDepthTextureLinear(depth_tex, sampleUV3.xy);
	sampleDepths[1].w = readDepthTextureLinear(depth_tex, sampleUV3.zw);
	sampleDepths[2].x = readDepthTextureLinear(depth_tex, sampleUV4.xy);
	sampleDepths[2].y = readDepthTextureLinear(depth_tex, sampleUV4.zw);
	sampleDepths[2].z = readDepthTextureLinear(depth_tex, sampleUV5.xy);
	sampleDepths[2].w = readDepthTextureLinear(depth_tex, sampleUV5.zw);
	sampleDepths[3].x = readDepthTextureLinear(depth_tex, sampleUV6.xy);
	sampleDepths[3].y = readDepthTextureLinear(depth_tex, sampleUV6.zw);
	sampleDepths[3].z = readDepthTextureLinear(depth_tex, sampleUV7.xy);
	sampleDepths[3].w = readDepthTextureLinear(depth_tex, sampleUV7.zw);


	float4 expectedSampleDepths[4];
	expectedSampleDepths[0].x = fragPosition.z + dot(1, dZdXY * sampleXYdelta0.xy);
	expectedSampleDepths[0].y = fragPosition.z + dot(1, dZdXY * sampleXYdelta0.zw);
	expectedSampleDepths[0].z = fragPosition.z + dot(1, dZdXY * sampleXYdelta1.xy);
	expectedSampleDepths[0].w = fragPosition.z + dot(1, dZdXY * sampleXYdelta1.zw);
	expectedSampleDepths[1].x = fragPosition.z + dot(1, dZdXY * sampleXYdelta2.xy);
	expectedSampleDepths[1].y = fragPosition.z + dot(1, dZdXY * sampleXYdelta2.zw);
	expectedSampleDepths[1].z = fragPosition.z + dot(1, dZdXY * sampleXYdelta3.xy);
	expectedSampleDepths[1].w = fragPosition.z + dot(1, dZdXY * sampleXYdelta3.zw);
	expectedSampleDepths[2].x = fragPosition.z + dot(1, dZdXY * sampleXYdelta4.xy);
	expectedSampleDepths[2].y = fragPosition.z + dot(1, dZdXY * sampleXYdelta4.zw);
	expectedSampleDepths[2].z = fragPosition.z + dot(1, dZdXY * sampleXYdelta5.xy);
	expectedSampleDepths[2].w = fragPosition.z + dot(1, dZdXY * sampleXYdelta5.zw);
	expectedSampleDepths[3].x = fragPosition.z + dot(1, dZdXY * sampleXYdelta6.xy);
	expectedSampleDepths[3].y = fragPosition.z + dot(1, dZdXY * sampleXYdelta6.zw);
	expectedSampleDepths[3].z = fragPosition.z + dot(1, dZdXY * sampleXYdelta7.xy);
	expectedSampleDepths[3].w = fragPosition.z + dot(1, dZdXY * sampleXYdelta7.zw);

	float sampleCount = 0;

	float influenceDistance = 0.3;
	//float influenceDistance = radius * 2;

	#ifndef CG_PS3
	[unroll]
	#endif
	for (int i = 0; i<2; i++)
	{
		if (i==2)
		{
			influenceDistance *= 2;
			radius *= 2;
		}

		float4 vDistance = expectedSampleDepths[i] - sampleDepths[i];
		float4 vRadiusSq = pow(fragPosition.zzzz - sampleDepths[i],2) + pow(radius.xxxx, 2);

		float4 vDistanceGreaterEqualZero = step(0.04, vDistance);
		float4 vDistanceSmallerThanInfluenceDistance = step(vRadiusSq, pow(influenceDistance,2));
		float4 vNormalizedDistance = 1 - vRadiusSq / pow(influenceDistance,2);

		float4 vAO = vDistanceGreaterEqualZero * vDistanceSmallerThanInfluenceDistance * vNormalizedDistance;


		//float4 vDistance = sqrt(pow(fragPosition.zzzz - sampleDepths[i],2) + radius.xxxx * radius.xxxx);

		//float4 vNormalizedDistance = vDistance / influenceDistance;

		//float4 vExpectedTan = -(expectedSampleDepths[i] - fragPosition.z) / radius.xxxx;
		//float4 vExpectedSin = vExpectedTan / sqrt(1.0 + vExpectedTan * vExpectedTan);

		//float4 vSampleTan = -(sampleDepths[i] - fragPosition.z) / radius.xxxx;
		//float4 vSampleSin = vSampleTan / sqrt(1.0 + vSampleTan * vSampleTan);

		//float4 vAO = 1;

		//vAO *= step(vDistance, influenceDistance);
		//vAO *= step(0.01, expectedSampleDepths[i] - sampleDepths[i]);
		//vAO *= step(vExpectedTan, vSampleTan);
		//vAO *= 1 - pow(vNormalizedDistance,2);
		////vAO *= (vSampleSin - vExpectedSin);

		ao += dot(vAO, 1);

		sampleCount += 4;
	}

	ao /= sampleCount;
	ao *= g_Params.b;

	// Fade towards screen edges where we're not sampling correct Z values
	ao *= 1 - saturate((abs(In.texUV.xy - 0.5) + radiusUV.xy) / 0.5 - 1) / radiusUV.xy;

	// Apply view-space attenuation
	ao *= 1.0 - saturate((fragPosition.z - g_Params3.x) * g_Params3.z);
	ao *= saturate((fragPosition.z - g_Params4.x) * g_Params4.z);

	return float4(0,0,0,ao);
	ao=1-ao;
	return float4(ao,ao,ao,1);
}

//float4 _ssao_px( VS_OUTSSAO In ) : COLOR
//{
//	// Restrict rendering to (effectively) LEGO pieces
//	// Note : This isn't generic. The normal.z is the 'Shadow Casting' flag, which for LEGO works out
//	// as LEGO pieces.
//	//clip(0.1 - tex2Dlod(normal_tex, float4(In.texUV,0,0)).z);
//
//	float3 vCameraPosition = fetch_eye_pos(In.texUV);
//	float ao = 0;
//
//	float3 fragmentNormal = tex2Dlod(normal_tex, float4(In.texUV,0,0)).xyz * 2.0 - 1.0;
//	fragmentNormal.z = -sqrt(1.0 - (fragmentNormal.x * fragmentNormal.x + fragmentNormal.y * fragmentNormal.y));
//
//	// we only want this effect to be visible on pixels close to the viewer,
//	// so compute the fall off amount
//	float fNearFallOff = (vCameraPosition.z - AO_NEAR_CUTOFF_RANGE) / AO_NEAR_FADE_RANGE;
//	float fFarFallOff = 1.0f - (vCameraPosition.z - AO_FAR_FALLOFF_START) / AO_FAR_FALLOFF_RANGE;
//	float fFallOff = min(1.0f, (min(fNearFallOff, fFarFallOff)));
//	
//	// we only want to influence close pixels
//	//$ATTRIB_BRANCH;
//	// XDC: This [ifAny] generates a warning, but the shader runs faster and it looks visually correct
//	// You can always disable the warnin with this pragma, but you'd have to add something to your
//	// shader interperter to handle this in the Xbox 360 case
//	//#pragma warning( disable : 3554 )
//	//$ATTRIB_IFANY;
//	//if (fFallOff > 0.0f)
//	{
//		// XDC: Generate noise procedurally rather than from a noise texture. The GPU has to stall
//		// when there is a tfetch depending on another tfetch. vCameraPosition is a 3 component,
//		// so to average, do dot with 0.3333 )
//		float fNoiseResult = dot( frac( vCameraPosition ), 0.3333f );
//		
//		fNoiseResult = (fNoiseResult * AO_NOISE_WEIGHT) + (1.0f - AO_NOISE_WEIGHT);
//
//		float fUnitDist = saturate((vCameraPosition.z - AO_RADIUS_START_DIST) / AO_RADIUS_RANGE);
//		float fLerpRadius = lerp(AO_RADIUS_MIN, AO_RADIUS_MAX, fUnitDist);
//	
//		float fAOOffset = AO_SAMPLE_OFFSET * fLerpRadius;// * fNoiseResult;
//		float fNearZ = vCameraPosition.z;// - fAOOffset;
//		float fFarZ  = vCameraPosition.z;// + fAOOffset;
//
//		// VGP: Calculate offset in UV space
//		float2 UVOffsetNear = fAOOffset * g_FocalLen.xy / fNearZ * 0.5;
//		float2 UVOffsetFar = fAOOffset * g_FocalLen.xy / fFarZ * 0.5;
//		
//		UVOffsetNear =  4.0 * g_Resolution.zw;
//		UVOffsetFar  = 8.0 * g_Resolution.zw;
//
//		// XDC: Use 4x float4's rather than 8x float2's
//		// fill out our set of direction vectors we'll sample the depth buffer from			
//		float4 vSphereOffset_00 = float4(-1.0, -1.0,  1.0, -1.0);
//		float4 vSphereOffset_01 = float4( 1.0,  1.0, -1.0,  1.0);
//		float4 vSphereOffset_02 = float4(   0, -1.0,  1.0,    0);
//		float4 vSphereOffset_03 = float4(   0,  1.0, -1.0,    0);
//
//
//		float4 vTex_00 = In.texUV.xyxy + vSphereOffset_00 * UVOffsetNear.xyxy;
//		float4 vTex_01 = In.texUV.xyxy + vSphereOffset_01 * UVOffsetNear.xyxy;
//		float4 vTex_02 = In.texUV.xyxy + vSphereOffset_02 * UVOffsetFar.xyxy;
//		float4 vTex_03 = In.texUV.xyxy + vSphereOffset_03 * UVOffsetFar.xyxy;
//
//		float4 vTex_04 = In.texUV.xyxy + vSphereOffset_00 * UVOffsetFar.xyxy;
//		float4 vTex_05 = In.texUV.xyxy + vSphereOffset_01 * UVOffsetFar.xyxy;
//		float4 vTex_06 = In.texUV.xyxy + vSphereOffset_02 * UVOffsetNear.xyxy;
//		float4 vTex_07 = In.texUV.xyxy + vSphereOffset_03 * UVOffsetNear.xyxy;
//
//		// we can do some work in parrallel if we depend on AO_SAMPLE_COUNT = 8
//		float4 vDepthSampled[4];
//
//		vDepthSampled[0].x = readDepthTextureLinear(depth_tex, vTex_00.xy);
//		vDepthSampled[0].y = readDepthTextureLinear(depth_tex, vTex_00.zw);
//		vDepthSampled[0].z = readDepthTextureLinear(depth_tex, vTex_01.xy);
//		vDepthSampled[0].w = readDepthTextureLinear(depth_tex, vTex_01.zw);
//
//		vDepthSampled[1].x = readDepthTextureLinear(depth_tex, vTex_02.xy);
//		vDepthSampled[1].y = readDepthTextureLinear(depth_tex, vTex_02.zw);
//		vDepthSampled[1].z = readDepthTextureLinear(depth_tex, vTex_03.xy);
//		vDepthSampled[1].w = readDepthTextureLinear(depth_tex, vTex_03.zw);
//
//		vDepthSampled[2].x = readDepthTextureLinear(depth_tex, vTex_04.xy);
//		vDepthSampled[2].y = readDepthTextureLinear(depth_tex, vTex_04.zw);
//		vDepthSampled[2].z = readDepthTextureLinear(depth_tex, vTex_05.xy);
//		vDepthSampled[2].w = readDepthTextureLinear(depth_tex, vTex_05.zw);
//
//		vDepthSampled[3].x = readDepthTextureLinear(depth_tex, vTex_06.xy);
//		vDepthSampled[3].y = readDepthTextureLinear(depth_tex, vTex_06.zw);
//		vDepthSampled[3].z = readDepthTextureLinear(depth_tex, vTex_07.xy);
//		vDepthSampled[3].w = readDepthTextureLinear(depth_tex, vTex_07.zw);
//
//		float4 normalDots[4];
//		float3 normal;
//
//		normal = tex2Dlod(normal_tex, float4(vTex_00.xy,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[0].x = dot(fragmentNormal, normal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_00.zw,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[0].y = dot(normal, fragmentNormal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_01.xy,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[0].z = dot(normal, fragmentNormal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_01.zw,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[0].w = dot(fragmentNormal, normal);
//
////-------------
//
//		normal = tex2Dlod(normal_tex, float4(vTex_02.xy,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[1].x = dot(fragmentNormal, normal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_02.zw,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[1].y = dot(normal, fragmentNormal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_03.xy,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[1].z = dot(fragmentNormal, normal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_03.zw,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[1].w = dot(fragmentNormal, normal);
//
////-------------
//
//		normal = tex2Dlod(normal_tex, float4(vTex_04.xy,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[2].x = dot(fragmentNormal, normal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_04.zw,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[2].y = dot(normal, fragmentNormal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_05.xy,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[2].z = dot(normal, fragmentNormal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_05.zw,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[2].w = dot(fragmentNormal, normal);
//
////-------------
//
//		normal = tex2Dlod(normal_tex, float4(vTex_06.xy,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[3].x = dot(fragmentNormal, normal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_06.zw,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[3].y = dot(normal, fragmentNormal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_07.xy,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[3].z = dot(normal, fragmentNormal);
//
//		normal = tex2Dlod(normal_tex, float4(vTex_07.zw,0,0)).xyz* 2.0 - 1.0;
//		normal.z = -sqrt(1.0 - (normal.x*normal.x + normal.y*normal.y));
//		normalDots[3].w = dot(fragmentNormal, normal);
//
//
//		float4 vDepthPosition[4];
//		vDepthPosition[0] = fNearZ;
//		vDepthPosition[1] = fFarZ;
//		vDepthPosition[2] = fFarZ;
//		vDepthPosition[3] = fNearZ;
//				
//		// we keep track of the total ratio of blocking depth values,
//		// as well as the number of samples we will consider to have
//		// influence on the final result
//		float fTotalRatio = 0.0f;
//		float fNumSamples = 0.0f;
//
//		// XDC: Doing all calculations with 2 loops using float4 vectors for parrallel calculations,
//		// rather than 8 loops with scalars. The use of step() and lerp() also gets rid of all
//		// branches in the loops
//		//$ATTRIB_UNROLL;
//		for ( int i = 0; i < 4; i++ )
//		{
//			float4 vDistance = vDepthPosition[i] - vDepthSampled[i];
//			float4 vDistanceGreaterEqualZero = step( 0.0f, vDistance );
//			
//			float4 vDistanceSmallerThanInfluenceDistance = step( vDistance, AO_INFLUENCE_DISTANCE );
//			//float4 vDistanceSmallerThanInfluenceDistance = step( abs(vDistance), AO_INFLUENCE_DISTANCE );
//			
//			// convert this value to a normalized range
//			float4 vNormalizedDistance = vDistance / AO_INFLUENCE_DISTANCE;
//
//			//// scale by an approximate PI value and offset
//			//vNormalizedDistance = vNormalizedDistance * 3.14 + 0.75;
//
//			//// this uses a taylor series to approximate sin(fDistance), this
//			//// curve allows points close to the surface to increase, then decrease
//			//// in the occlusion amount
//			//vNormalizedDistance = vNormalizedDistance - (vNormalizedDistance * vNormalizedDistance * vNormalizedDistance) * 0.16666667f;
//
//			//// cap negative values
//			//vNormalizedDistance = max(0.0f, vNormalizedDistance);
//
//			//float4 vLerp = lerp( 0.5f, vNormalizedDistance, vDistanceSmallerThanInfluenceDistance );
//			//					
//			//float4 vTotalRatio = vDistanceSmallerThanInfluenceDistance * vNormalizedDistance;
//			////vTotalRatio = pow(saturate(1 - normalDots[i]), 3);
//			//float4 vNumSamples = 1.0 + vLerp;
//			//
//			//vTotalRatio = lerp( 0.0f, vTotalRatio, vDistanceGreaterEqualZero );
//			//vNumSamples = lerp( 3.0f, vNumSamples, vDistanceGreaterEqualZero );
//
//	
//			float4 vTotalRatio = vDistanceGreaterEqualZero * vDistanceSmallerThanInfluenceDistance * vNormalizedDistance;
//			float4 vNumSamples = vDistanceGreaterEqualZero;// * vDistanceSmallerThanInfluenceDistance;
//
//			//vTotalRatio *= 1 - step(0.9, normalDots[i]);
//			//vTotalRatio = (1 - pow(saturate(normalDots[i]),1)) * vDistanceSmallerThanInfluenceDistance;
//
//			fTotalRatio += dot( vTotalRatio, 1 );
//
//			//fNumSamples += dot( vNumSamples, 1 );
//			fNumSamples += 4;
//		}
//
//		// divide by the number of samples and invert the result
//		if (fNumSamples>0)
//			fTotalRatio /= fNumSamples;
//		else
//			fTotalRatio = 0;
//
//
//		//fTotalRatio = pow(fTotalRatio, 2);
//
//		//fTotalRatio = 1.0f - fTotalRatio;
//
//		// the results are typically too dark. this will effectively map our
//		// range of results into the specified min and max values
//		//fTotalRatio = fTotalRatio - AO_INFLUENCE_MIN;
//		//fTotalRatio = fTotalRatio * AO_INFLUENCE_ONE_OVER_RANGE;
//
//		// a last ditch effort to increase the contrast
//		//fTotalRatio = fTotalRatio * fTotalRatio;
//		//fTotalRatio = min(1.0f, fTotalRatio);
//
//		// interpolate based on the fall off
//		//fTotalRatio = (fTotalRatio * fFallOff) + (1.0f - fFallOff);
//
//		ao = fTotalRatio * g_Params.b;
//	}
//
//	//return float4(0,0,0,ao);
//	ao=1-ao;
//	return float4(ao,ao,ao,1);
//}

#endif


float4 DownsampleDepth_px(float2 uvCoords : TEXCOORD0) : COLOR
{
	return fs_projection_params.x / (1.0 - readDepthTexture(depth_tex, uvCoords) * fs_projection_params.w);
}

float4 DownsampleNormals_px(float2 uvCoords : TEXCOORD0) : COLOR
{
	return tex2D(normal_tex, uvCoords);
}
